knitr::opts_chunk$set(echo = TRUE)
pkgs <- c("pwr", "dplyr", "ggplot2", "tidyr", "knitr", "kableExtra", "shiny")
for (i in seq_along(pkgs)) {library(pkgs[i], character.only = TRUE)}
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## 
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows
# DnD character generation option: roll 4d6, take the top 3
# simulating to check distribution of that approach

# function to simulate dice rolls ==============================================
# inputs:
# - counts: vector for number of rolls
# - sides: vector of sides per die (aligned with counts)
# - seed: number to set random number generator seed for replication
#     default (NULL) sets no RNG seed
# - drop.lowest.n: vector to specify if dropping the lowest 'n' results
#     default (NULL) will drop none for any roll
#     a single number will apply that number to any roll
#     a vector of 
# output:
# - list with 1 entry per roll set; each entry has the final rolls
#     (excluding any lowest dropped rolls if specified) and sum of the set
#     the final list entry is the sum across all roll sets
roll <- function(counts, sides, seed = NULL, drop.lowest.n = NULL) {
  if (length(counts) != length(sides)) {stop("counts and sides lengths don't match")}
  if (!all(sides %in% c(4, 6, 8, 10, 100, 12, 20))) {
    stop("sides can only contain 4, 6, 8, 10, 100 (%), 12, 20")}
  set.seed(seed)
  pct_idx <- sides == 100
  
  if (is.null(drop.lowest.n)){d.l.n <- rep(0, length(counts))
  } else {
    if (any(drop.lowest.n >= counts)) {stop("drop.lowest.n must be at most counts - 1")}
    if (length(drop.lowest.n) == 1L) {d.l.n <- rep(drop.lowest.n, length(counts))
    } else {
      if (length(drop.lowest.n) != length(counts)) {
        stop("drop.lowest.n must be one of: same length as counts, a single number, or NULL")
      }
      d.l.n <- drop.lowest.n
      }
  }
  
  # check feasibility of dropping lowest 'n' rolls in each case
  if (any(d.l.n < 0)) {stop("drop.lowest.n cannot be negative")}

  roll_set <- 
    lapply(1:length(sides), function(i) {
      if (pct_idx[i]) {
        ones <- sample(0:9, size = counts[i], replace = TRUE)
        tens <- sample(0:9 * 10, size = counts[i], replace = TRUE)
        rslt <- tens + ones
        rslt[rslt == 0] <- 100
        
        if (d.l.n[i] > 0) {rslt <- (rslt[order(rslt)])[-c(1:d.l.n[i])]}
        rslt <- list("rolls" = rslt, "sum" = sum(rslt))
        rslt
      } else {
        rslt <- sample(1:sides[i], size = counts[i], replace = TRUE)
        if (d.l.n[i] > 0) {rslt <- (rslt[order(rslt)])[-c(1:d.l.n[i])]}
        rslt <- list("rolls" = rslt, "sum" = sum(rslt))
        rslt
        }
      })
  
  names(roll_set) <- paste0(counts, "d", ifelse(pct_idx, "%", sides))
  sums <- 
    vapply(seq_along(roll_set), function(i){roll_set[[i]]$sum}, numeric(1L)) 
  
  roll_set[[length(roll_set) + 1]] <- c("ovr total" = sum(sums))
  
  roll_set
}

# just curious: impact of 3d4 minus 2 versus 1d10 (same min/max)
# data.frame(setup = c("3d4 - 2 vs. 1d10"),
#            sim_seed = 1:1e4) %>%
#   mutate(
#     results = lapply(1:nrow(.), function(i){
#       roll(counts = c(3, 1), sides = c(4, 10), seed = .$sim_seed[i])
#     })
#   ) %>%
#   mutate(
#     res_3d4mns2 = vapply(1:nrow(.), function(i) {
#       .$results[[i]]$`3d4`$sum - 2}, numeric(1L)),
#     res_1d10 = vapply(1:nrow(.), function(i) {
#       .$results[[i]]$`1d10`$sum}, numeric(1L)),
#   ) %>%
#   pivot_longer(cols = starts_with("res_"),
#                names_to = "roll", names_prefix = "res_",
#                values_to = "result") %>%
#   ggplot(aes(x = result, color = roll)) +
#   geom_density() +
#   scale_x_continuous(breaks = 1:10, minor_breaks = FALSE)

# unsurprising - 3d4 - 2 concentrates results closer to center of results range
# (esp. 4-7); 1d10 is more 'swingy'

1 Analysis Plan

  • Chi-square Goodness of Fit (GoF) Test

  • Bayesian multinomial modeling

2 Dice set notes (listed in order of appearance/testing)

  • ‘white’: Chessex ‘speckled arctic camo’ 7-die set (CHX 25311); mostly white with grey flecks and black numbering, giving it a ‘granite’ appearance
    • urea
  • ‘grey’: Chessex ‘speckled granite’ 7-die set (CHX 25320); white ‘core’ composition with extensive darker grey flecks and some lighter grey flecks, and ‘blood-red’ numbering
    • urea
    • retired from use due to the mottled grey/dark red color scheme being hard to read unless under very bright light; re-inking while trying to keep reddish numbering didn’t help
  • ‘black’: Chessex ‘speckled urban camo’ 7-die set (CHX 25328); dark grey ‘core’ composition with extensive black flecks and yellow numbering
    • urea
  • ‘green’: FanRoll ‘recycled rainbow’ 7-die set; white ‘core’ composition with various-colored flecks throughout and green numbering
    • resin
  • ‘teal1’ and ‘teal2’: Gravity Dice individual d20s; teal with silver numbering (same coloring for each; unfortunately before rolling I did not note this, but one has the 15 at the bottom of its facet ‘triangle’ - otherwise they appear identical; based on subsequent test rolls, I believe the ‘15 label at bottom’ die is ‘teal1’)
    • 6061 aluminum
  • ‘mushroom’: [Etsy-gifted] individual d20; clear with embedded moss, lavender, and a small mushroom and gold numbering
    • resin
  • ‘aubergine’: Die Hard Dice ‘Rerolls’ recycled plastic 7-die set; dark purple (d8 and d20 are closer to ‘plum’, being lighter) with embedded glitter and white numbering
    • acrylic
  • ‘ocean’: TheWizardsVault ‘ocean opal’ 7-die set; translucent cyan with gold foil inclusion and gold numbering
    • resin
  • ‘ice’: FeverDream Dice ‘ice queen’ 7-die set; opaque white with blue/green microglitter and gold foil inclusion and pastel light blue numbering
    • resin, sharp-edged (all others thus far are round-edged)
    • unique among sets thus far, the d4 is a bipyramid (‘shard’) rather than a tetrahedron
  • ‘infernal’: Die Hard Dice 7-die set (‘Elessia Kybr - Inquisitor with Gold’); lustrous cardinal red mixed with black, with gold numbering
    • acrylic
  • ‘blue’: unknown manufacturer 7-die set, received as a gift; believe it is Sage’s Portal ‘Sage’s Labyrinths’ blue weave white ink hollow metal polyhedral set; metallic ‘pure blue’ hollow metal set with maze-like perforations and white ink
    • metal
  • ‘jade’: Fennek & Finch ‘Jade’ 7-die set; rich green swirled/blended with white to closely mimic jade (/serpentine), with silver font
    • acrylic
  • ‘expanse’: Fennek & Finch ‘Endless Expanse’ 8-die set; ink-black dice with extensive dark-blue microglitter giving impression of ‘infinite depth’, with copper font
    • acrylic
    • 8th die is an extra, stylized d6 (star / cat paw / ‘cat wizard’ pips for side 1 / 2-5 / 6)
  • ‘light rainbow’: Phoenixville Dice 7-die set; blended colors (dark blue, light blue, green-yellow, red and some mixes thereof) with slight metallic sheen and white font
    • resin

3 Rolling Surface

  • Initial rolls (white, grey, black, most green rolls): FanRoll velvet dice tray with leather backing (‘rainbow watercolor’); 10” x 10” dimensions, moderate padding

  • Later rolls (rest of green, teal[1/2], mushroom, aubergine, ocean, ice, infernal, blue, jade, expanse; light rainbow rolls): GameGenic Tokens’ Lair magnetic dice tray; 6.5” x 3.75” dimensions, less padding

    • Starting with the ‘ice’ dice, a uniform layer of cork was added to the tray to provide better cushioning for the sharp-edge dice

4 Power Analysis to Determine Sample Size to Vet Dice

sets <- c("white", "grey", "black", "green", "teal1", "teal2", "mushroom", "aubergine", "ocean", "ice", "infernal", "blue", "jade", "expanse", "lt.rainbow")  
dice <- paste0("d", c(4, 6, 8, 10, "%", 12, 20))
alpha <- 0.05 # P(reject null[fair die] | actually fair die)
pwr <- 0.80 # P(don't reject null | alt [stated bias exists] is true)

side_levels <- c(0:20, seq(30, 90, by = 10))

# null hypothesis: equal probability for each side
# alternative hypothesis: one side is 'preferred' (100 * alt_pref)% more
#  (by dice structure, that means opposite side dice is 'disliked' by same %)
#  (I'm assuming this holds true for d4 too)
alt_pref <- 0.33333

dice_setup <- 
  data.frame(
    set = rep(sets, each = length(dice)),
    die = rep(dice, times = length(sets))
    ) %>% 
  # only d20 for teal1/teal2/mushroom 'set's
  filter(!((grepl("teal", set) | grepl("mushroom", set)) & 
           die %in% paste0("d", c(4, 6, 8, 10, "%", 12)))) %>% 
  # add a row for the stylized 'expanse' d6
  bind_rows(
    ., 
    data.frame(set = "expanse", die = "d6", note = "stylized d6")
  ) |> 
  mutate(
    sides_char = ifelse(grepl("\\%", die), 10, sub("d", "", die)),
    sides = as.numeric(sides_char)
  ) %>% 
  select(-sides_char) %>% 
  # note: need separate mutate() calls when the previous mutate-created 
  #       content is needed to create subsequent multi-step entry 
  mutate(
    null = lapply(1:nrow(.), function(i) {
      rep(1 / .$sides[i], times = .$sides[i])
    }),
    alt = lapply(1:nrow(.), function(i) {
      eq_prob <- 1 / .$sides[i]
      prefr <- (1 + alt_pref) * eq_prob
      dislk <- (1 - alt_pref) * eq_prob
      rest <- (1 - (prefr + dislk)) / (.$sides[i] - 2)
      
      c(prefr, dislk, rep(rest, .$sides[i] - 2))
    })
  ) %>% 
  mutate(
    effect_size = vapply(1:nrow(.), function(i) {
      ES.w1(P0 = .$null[[i]], P1 = .$alt[[i]])
    }, numeric(1L))
  ) %>% 
  mutate(
    sample_size = vapply(1:nrow(.), function(i) {
      N_ <- 
      pwr.chisq.test(w = .$effect_size[i], 
                     N = NULL,
                     df = .$sides[i] - 1, 
                     sig.level = alpha,
                     power = pwr)$N
      
     ceiling(N_) # round up
    }, numeric(1L))
  )

5 Checking Dice

5.1 Data Collection

Notes on data collection:

  • Rolls must land in the dice tray; any roll outside it will be rejected (this includes rolls hitting the edge of the dice tray and landing outside the tray, rolls landing on the dice tray but then bouncing out, and rolls landing outside the dice tray but then bouncing in; rolls hitting the dice tray side but no other object and landing inside the tray will be included)

  • Rolls must be spun in the air; any roll (subjectively) deemed to have been insufficiently ‘spun’ will be rejected (i.e. it felt like I basically just dropped the dice instead of spinning it in the air)

  • Dice are rolled one at a time; each result is recorded before picking up the die to conduct the next roll

  • If the die doesn’t come to rest fully on one face (i.e. one edge of the die is resting against the side of the dice tray), the die must be (subjectively) deemed completely unambiguous as to which side would be facing up if the tray side were not there for the roll to be recorded

  • If the die is rotated while picking it up and before the roll result is noted, the die should be rolled twice without recording the results before resuming recording of roll results (to potentially reduce the risk of bias in the recorded result, should one roll have an association with the prior one)

  • The roll technique is described below:

  • The die starts sitting upright in the open right hand, only slightly cupped with the die generally resting around the base of the middle and ring fingers

  • The roll is an upward and ‘counterclockwise’ (wrist rotates the pinky to the right) twist, allowing the die to roll from the hand; generally this means the die rolls off the pinky, or occasionally the ring finger

  • After each roll is recorded, the die is simply picked up and re-rolled; no deliberate care is made to randomize or control the up-facing side of the die (generally it seems to differ from the previous roll’s result due to the picking up action)

# quick plotting function
plot_result <- function(df, fill_color = "dodgerblue2") {
  set_die <- deparse(substitute(df)) # store df name as text
  set_ <- sub("^([a-z]+.{0,1}[a-z]*)_d[0-9|%]{1,2}_df$", "\\1", set_die)
  die_ <- sub("^[a-z]+.{0,1}[a-z]*_(d[0-9|%]{1,2})_df$", "\\1", set_die)
  
  avg_count <- (1 / length(unique(df$result))) * nrow(df)
  
  ggplot(df, aes(x = result)) +
    geom_vline(xintercept = mean(df$result), linetype = "solid") +
    geom_hline(yintercept = avg_count, linetype = "solid") +
    geom_bar(fill = fill_color, color = "black") +
    labs(title = paste("Plot of", set_, die_, "results"),
         subtitle = paste("n = ", nrow(df)),
         caption = paste0("Horizontal line indicates expected count (",
                          sprintf("%.1f", avg_count), 
                          "); Vertical line indicates mean value (",
                          sprintf("%.1f", mean(df$result)),")")) +
    scale_x_continuous(breaks = unique(df$result)) +
    theme_bw()
}


set_fill_map <- 
  c("white" = "ivory3", "grey" = "grey40", 
    "black" = "grey10", "green" = "green4",
    "teal1" = "darkcyan", "teal2" = "steelblue4",
    "mushroom" = "gold3", "aubergine" = "#614051",
    "ocean" = "cyan2", "ice" = "powderblue", "infernal" = "red3", 
    "blue" = "blue", "jade" = "seagreen", "expanse" = "#07054B", 
    "lt.rainbow" = "greenyellow")

5.1.1 White Dice

5.1.2 Grey Dice

5.1.3 Black Dice

5.1.4 Green Dice

5.1.5 Individual d20s

5.1.6 Aubergine Dice

5.1.7 Ocean Dice

5.1.8 Ice Dice

5.1.9 Infernal Dice

5.1.10 Blue Dice

5.1.11 Jade Dice

5.1.12 Expanse Dice

5.2 Light Rainbow Dice

6 Aggregated Analyses

# loading the above dice-roll data
dice_roll_df <- 
  read.csv(file = "dice_rolls.csv")

# function to identify streaks in the data
find_run_lengths <- function(x, streaks.only = TRUE){
  run_len_encoding <- rle(x)
  rle_df <- data.frame(value = run_len_encoding$values,
                       length = run_len_encoding$lengths)

  if (streaks.only) {
    rle_df[rle_df$length > 1L,]
  } else {rle_df}
}

# function for multinomial proportion modelling
rdirichlet <- # from the LearnBayes package
  function (n, par) {
    k = length(par)
    z = array(0, dim = c(n, k))
    s = array(0, dim = c(n, 1))
    
    for (i in 1:k) {
      z[, i] = rgamma(n, shape = par[i])
      s = s + z[, i]
    }
    
    for (i in 1:k) {
      z[, i] = z[, i] / s
    }
    
    return(z)
  }
set.seed(42)
dice_roll_df_ <- 
  dice_roll_df %>% 
  mutate(set = factor(set, levels = sets),
         
         die = factor(die, levels = c(paste0("d", c(4, 6)), "stylized d6",
                                      paste0("d", c(8, 10, "%", 12, 20))))
  ) %>% 
  nest_by(set, die) %>% 
  mutate(
    die_val = sub("d|stylized d", "", die),
    die_val = case_when(
      die_val == "%" ~ 100,
      #die_val == "stylized d6" ~ 6,
      TRUE ~ as.numeric(die_val)),
    n_obs = length(data$result),
    expected_count_per_side = 
      case_when(die_val == 100 ~ n_obs / 10,
                TRUE ~ n_obs / die_val),
    mean_ = mean(data$result),
    expected_mean = 
      case_when(die_val == 100 ~ mean(0:9 * 10),
                die_val == 10 ~ mean(1:10),
                TRUE ~ mean(1:die_val)),
    sd_ = sd(data$result),
    expected_sd = 
      case_when(die_val == 100 ~ sd(sample(0:9 * 10, n_obs, replace = TRUE)),
                die_val == 10 ~ sd(sample(1:10, n_obs, replace = TRUE)),
                TRUE ~ sd(sample(1:die_val, n_obs, replace = TRUE))),
    result_tbl = list(table(data$result)),
    chisq_pval = chisq.test(result_tbl)$p.value,
    chisq_stat_sig_0.05 = chisq_pval <= 0.05,
    roll_streaks = list(find_run_lengths(data$result, streaks.only = TRUE)),
    roll_streak_tbl = list(table(roll_streaks))
    )
## Warning: There were 12 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `die_val = case_when(die_val == "%" ~ 100, TRUE ~
##   as.numeric(die_val))`.
## ℹ In row 5.
## Caused by warning:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 11 remaining warnings.

6.1 ‘Fairest’ and ‘Least Fair’

# checking which dice per side across sets have largest p-values
fairest_dice_top5 <- 
  dice_roll_df_ %>% 
  group_by(die) %>% 
  arrange(desc(chisq_pval)) %>% 
  mutate(p_val_rank = 1:n()) %>% 
  ungroup() %>% 
  filter(p_val_rank %in% c(1:5)) %>% 
  select(set, die, chisq_pval, p_val_rank) %>% 
  arrange(die, p_val_rank, set)

fairest_dice_bot3 <- 
  dice_roll_df_ %>% 
  group_by(die) %>% 
  arrange(desc(chisq_pval)) %>% 
  mutate(p_val_rank = 1:n(), max_rank = n()) %>% 
  ungroup() %>% 
  mutate(bad = p_val_rank == max_rank - 2,
         worse = p_val_rank == max_rank - 1,
         worst = p_val_rank == max_rank) |> 
  filter(bad | worse | worst) %>% 
  select(set, die, chisq_pval, p_val_rank) %>% 
  arrange(die, desc(p_val_rank), set)

# table formatting
format_table <- function(x){
  x |> 
  kbl(format = "html") |>
  kable_styling(full_width = FALSE, 
                bootstrap_options = c("striped", "bordered"))
}

noquote("Fairest Dice - Top 5")
## [1] Fairest Dice - Top 5
fairest_dice_top5 |> 
  mutate(chisq_pval = round(chisq_pval, 3)) |> 
  format_table()
set die chisq_pval p_val_rank
grey d4 0.900 1
aubergine d4 0.881 2
expanse d4 0.745 3
infernal d4 0.697 4
blue d4 0.660 5
blue d6 0.981 1
aubergine d6 0.802 2
green d6 0.612 3
jade d6 0.607 4
lt.rainbow d6 0.596 5
expanse stylized d6 0.107 1
aubergine d8 0.982 1
grey d8 0.910 2
jade d8 0.882 3
expanse d8 0.876 4
lt.rainbow d8 0.704 5
grey d10 0.829 1
white d10 0.804 2
black d10 0.793 3
green d10 0.556 4
lt.rainbow d10 0.316 5
lt.rainbow d% 0.893 1
grey d% 0.624 2
infernal d% 0.500 3
white d% 0.363 4
black d% 0.305 5
white d12 0.507 1
infernal d12 0.412 2
green d12 0.393 3
ice d12 0.387 4
black d12 0.321 5
lt.rainbow d20 0.770 1
infernal d20 0.126 2
aubergine d20 0.097 3
expanse d20 0.003 4
ice d20 0.002 5
noquote("Fairest Dice - Sets by # Appearances and Average Ranking in Top 5")
## [1] Fairest Dice - Sets by # Appearances and Average Ranking in Top 5
fairest_dice_top5 |> 
  group_by(set) |> 
  summarize(top5_ct = n(), 
            avg_rank = mean(p_val_rank) |> round(3), 
            .groups = "drop") |> 
  arrange(desc(top5_ct), avg_rank) |> 
  format_table()
set top5_ct avg_rank
lt.rainbow 5 3.400
grey 4 1.500
aubergine 4 2.000
infernal 4 2.750
expanse 4 3.000
white 3 2.333
green 3 3.333
black 3 4.333
blue 2 3.000
jade 2 3.500
ice 2 4.500
# light rainbow, then grey, then infernal and aubergine, then expanse, 
#   then black, then white, then green 
# (descending order of 'fairness')
# aside from d20s, no real concerns of fairness for any observations
# winning by a country mile, Phoenixville Dice light rainbow d20! seriously cool beans...
# infernal and aubergine d20 are very 'fair'! All Hail Die Hard Dice...
# surprised the expanse and ice d20 are third / fourth fairest...

noquote("Least Fair Dice - Bottom 3")
## [1] Least Fair Dice - Bottom 3
fairest_dice_bot3 |> 
  mutate(chisq_pval = round(chisq_pval, 3)) |>
  format_table()
set die chisq_pval p_val_rank
lt.rainbow d4 0.515 12
ice d4 0.588 11
jade d4 0.597 10
ocean d6 0.024 12
white d6 0.068 11
infernal d6 0.136 10
expanse stylized d6 0.107 1
green d8 0.040 12
ice d8 0.170 11
ocean d8 0.351 10
blue d10 0.000 12
ocean d10 0.051 11
ice d10 0.070 10
ice d% 0.004 12
jade d% 0.034 11
blue d% 0.052 10
ocean d12 0.013 12
aubergine d12 0.079 11
jade d12 0.091 10
green d20 0.000 15
white d20 0.000 14
black d20 0.000 13
noquote("Least Fair Dice, Arranged by p-value")
## [1] Least Fair Dice, Arranged by p-value
fairest_dice_bot3 |> 
  arrange(chisq_pval) |> 
  mutate(chisq_pval = round(chisq_pval, 3)) |>
  format_table()
set die chisq_pval p_val_rank
green d20 0.000 15
white d20 0.000 14
black d20 0.000 13
blue d10 0.000 12
ice d% 0.004 12
ocean d12 0.013 12
ocean d6 0.024 12
jade d% 0.034 11
green d8 0.040 12
ocean d10 0.051 11
blue d% 0.052 10
white d6 0.068 11
ice d10 0.070 10
aubergine d12 0.079 11
jade d12 0.091 10
expanse stylized d6 0.107 1
infernal d6 0.136 10
ice d8 0.170 11
ocean d8 0.351 10
lt.rainbow d4 0.515 12
ice d4 0.588 11
jade d4 0.597 10
# green d20 is clearly not random, nor is white d20
# for other-sided dice even the 'worst' performers are solid except 
# 'ice' d10 slightly a bit biased against '0', EV is ok (5.4 vs. 'fair' 5.5)
# 'ice' d% is very unlikely to roll 00, otherwise looks great
# 'ocean' d12 ... expected value (6.3) isn't wildly off 'random' EV (6.5)
# 'ocean' d6  ... expected value (3.5) exactly matches 'random' EV, 
#   just more 3s than 1s
# 'blue d%' skews toward 80s/90s and away from 0s/10s - expected value is high
# 'jade d%' has more 10s than 80s (expected value is slightly low), otherwise not too bad

6.2 Mean and SD of Observed Rolls

# observed vs. expected mean / SD
dice_mean_df_tbl <- 
  dice_roll_df_ %>% 
  select(set, die, observed_mean = mean_, expected_mean) %>% 
  mutate("pct_diff" = (observed_mean - expected_mean) / expected_mean * 100) %>% 
  pivot_longer(cols = ends_with("_mean"), names_to = "mean", values_to = "value") %>% 
  mutate(type = sub("_mean", "", mean), var = "mean") %>% 
  select(-mean)

dice_sd_df_tbl <- 
  dice_roll_df_ %>% 
  select(set, die, observed_sd = sd_, expected_sd) %>% 
  mutate("pct_diff" = (observed_sd - expected_sd) / expected_sd * 100) %>% 
  pivot_longer(cols = ends_with("_sd"), names_to = "sd", values_to = "value") %>% 
  mutate(type = sub("_sd", "", sd), var = "sd") %>% 
  select(-sd)

dice_mean_sd_tbl <- 
  full_join(dice_mean_df_tbl, dice_sd_df_tbl, 
            by = c("set", "die", "var", "type", "value", "pct_diff")) %>% 
  mutate(gt_8pct_diff = abs(pct_diff) > 8)

mean_sd_by_set_plots <- 
  lapply(seq_along(sets), function(i) {
    ggplot(data = dice_mean_sd_tbl[dice_mean_sd_tbl$set == sets[i],],
           aes(x = type, ymin = 0, ymax = value, y = value, color = gt_8pct_diff)) +
      facet_grid(rows = vars(die), cols = vars(var), scales = "free_y") +
      geom_pointrange() +
      labs(title = "Expected vs. Observed Mean/SD of Dice Rolls",
           subtitle = paste(sets[i], "dice"),
           color = ">8% diff.\nvs. expected") +
      scale_color_manual(values = c("FALSE" = "grey30", "TRUE" = "orange3")) +
      theme_bw()
  })

# focus on cases of interest
focus_mean_sd_plots <- 
  dice_mean_sd_tbl %>% 
  filter(gt_8pct_diff) %>% 
  nest_by(set, die) %>% 
  mutate(
    plot_ = list(
      ggplot(data = data,
           aes(x = type, ymin = 0, ymax = value, y = value)) +
      facet_wrap(facets = vars(var), scales = "free_y", ncol = 2) +
      geom_pointrange(color = "orange3") +
      labs(title = "Expected vs. Observed Mean/SD of Dice Rolls",
           subtitle = paste(set, die)) +
        theme_bw()
    )
  ) %>% 
  pull(plot_)
for (i in seq_along(mean_sd_by_set_plots)) {
  if (i == 1) {cat("All Mean/SD Plots\n\n")}
  cat("\n\n")
  print(mean_sd_by_set_plots[[i]])
}

All Mean/SD Plots

for (i in seq_along(focus_mean_sd_plots)) {
  if (i == 1) {cat("8+% difference Mean/SD Plots\n\n")}
  cat("\n\n")
  print(focus_mean_sd_plots[[i]])
}

8+% difference Mean/SD Plots

6.3 Roll Streaks

streak_color_map <- 
  c("1" = "blue4", "2" = "blue2", "3" = "dodgerblue3",
    "4" = "firebrick1", "5" = "firebrick", "6" = "red4")

dice_streaks_plots <- 
  lapply(1:nrow(dice_roll_df_), function(i) {
    df_0 <- dice_roll_df_$roll_streak_tbl[[i]]
    df_ <- data.frame(value = rep(rownames(df_0), times = ncol(df_0)),
                      strk_len = rep(colnames(df_0), each = nrow(df_0)))
    df_$value <- as.factor(as.integer(df_$value))
    
    df_$count <- as.integer(df_0)

    ggplot(df_, aes(x = value, y = count, fill = strk_len)) +
      geom_col(position = position_dodge(width = 0.7)) +
      labs(title = "Streak length by value",
           subtitle = paste(dice_roll_df_$set[i], dice_roll_df_$die[i])) +
      scale_fill_manual(values = streak_color_map) +
      theme_bw()
  })

streak_sim <- 
  lapply(1:100, function(i) {
    roll_sim <- roll(counts = 1851, sides = 20, seed = i)[[1]]$rolls
    rle_ <- find_run_lengths(roll_sim, streaks.only = TRUE)
    rle_
  }) %>% 
  bind_rows() %>% 
  table()

streak_sim_df <- 
  data.frame(value = rep(rownames(streak_sim), times = ncol(streak_sim)),
             strk_len = rep(colnames(streak_sim), each = nrow(streak_sim)))
streak_sim_df$value <- as.factor(as.integer(streak_sim_df$value))
streak_sim_df$count <- as.integer(streak_sim)

streak_sim_plot <- 
  ggplot(streak_sim_df, aes(x = value, y = count, fill = strk_len)) +
  geom_col(position = position_dodge(width = 0.7)) +
  labs(title = "Streak length by value",
       subtitle = "Simulated d20 rolls",
       caption = "1,851 rolls simulated 100 times") +
  scale_fill_manual(values = streak_color_map) +
  theme_bw()

dice_streak_focus <- c(7, 14, 21, 28, 29, 30, 31, 38, 40, 45, 52, 59,
                       63, 66, 68, 73, 81, 88)

focused_streak_plots <- 
  lapply(seq_along(dice_streak_focus), function(i){
    dice_streaks_plots[[dice_streak_focus[i]]]
    })
cat("\n\nSimulated d20 rolls for comparison\n\n")

Simulated d20 rolls for comparison

print(streak_sim_plot)

cat("\n\nSelected Streak Plots\n\n")

Selected Streak Plots

for (i in seq_along(focused_streak_plots)) {
  cat("\n\n")
  print(focused_streak_plots[[i]])
}

6.4 Observed vs. Expected for Stat. Sig. Dice

Note: The ‘aubergine’ and ‘infernal’ d20 are (amazingly) not statistically significant; they’re included for comparison against the other d20s, all of which were stat. sig. in the chi-squared test.

# plots of (Chi square HoV) stat-sig dice - side-by-side for same-die sets
comparison_plots_df <- 
  dice_roll_df_ %>%
  filter(chisq_stat_sig_0.05 |
         (set %in% c("aubergine", "infernal", "lt.rainbow") & die == "d20")) %>% 
  nest_by(die)

set_color_map <- 
  c("white" = "black", "grey" = NA_character_,  
    "black" = NA_character_, "green" = NA_character_, 
    "teal1" = NA_character_, "teal2" = NA_character_,
    "mushroom" = NA_character_, "aubergine" = NA_character_,
    "ocean" = NA_character_, "ice" = NA_character_,
    "infernal" = NA_character_, "blue" = NA_character_,
    "jade" = NA_character_, "expanse" = NA_character_,
    "lt.rainbow" = "navy")

comparison_plots <- 
  lapply(1:nrow(comparison_plots_df), function(i) {
    df_ <- comparison_plots_df[i,]
    summary_data <- df_$data
    results_data <- 
      lapply(summary_data, `[[`, "data") %>% 
      bind_rows() %>% 
      mutate(set = rep(summary_data[[1]]$set, times = summary_data[[1]]$n_obs))
    
    x_vals <- unique(results_data$result)
    x_vals <- x_vals[order(x_vals)]
    
    ggplot(results_data, aes(x = result, fill = set, color = set)) +
      geom_hline(data = summary_data[[1]], 
                 aes(yintercept = expected_count_per_side),
                 linetype = "dashed") +
      geom_bar(position = "dodge") +
      labs(title = paste("Observed Counts for", df_$die, "Rolls by Set"),
           subtitle = "Horizontal line: expected roll count (fair die)") +
      scale_x_continuous(breaks = x_vals, minor_breaks = FALSE) +
      scale_color_manual(values = set_color_map, guide = "none") +
      scale_fill_manual(values = set_fill_map) +
      theme_bw()
  })
for (i in 1:(length(comparison_plots) - 1)) {
  cat("\n\n")
  print(comparison_plots[[i]])
}

cat("\n\n")
print(comparison_plots[[length(comparison_plots)]])

6.5 Checking Stat. Sig. d20 Results If Collecting Smaller Samples

# simulating p-values from subsampling observed rolls - checking if
#  stat. sig. p-vals are more a result of large sample size
sample_chisq_pval <- 
  function(data, subset_var, value_var, nsim = 1e4, n = 500, seed = 42) {
    p_val_vec <- vector("numeric", nsim)
    
    data_sets <- lapply(1:length(unique(data[[subset_var]])), function(i) {
      set <- data[data[[subset_var]] == unique(data[[subset_var]])[i], "set"][1]
      data <- data[data[[subset_var]] == unique(data[[subset_var]])[i], 
                   value_var]
      list("set" = set, "data" = data)
    })
    
    p_val_mat <- 
      matrix(nrow = nsim, ncol = length(data_sets) + 1,
             dimnames = list(NULL, 
                             c("seed", 
                               vapply(data_sets, `[[`, character(1L), 1)))
             )
    
    for (i in 1:nrow(p_val_mat)) {
      for (j in 2:ncol(p_val_mat)) {
        set.seed(seed + i)
        p_val_mat[i, 1] <- seed + i 
        p_val_mat[i, j] <- 
          chisq.test(
            table(
              sample(data_sets[[j - 1]]$data, size = n, replace = FALSE)
              )
            )$p.value
      }
    }
    p_val_mat
  }


d20_sample_sim_n1000 <- 
  sample_chisq_pval(
    data = dice_roll_df[dice_roll_df$die == "d20", c("set", "result")],
    subset_var = "set",
    value_var = "result",
    nsim = 5e3, n = 1000, seed = 42
  )

d20_sample_sim_n750 <- 
  sample_chisq_pval(
    data = dice_roll_df[dice_roll_df$die == "d20", c("set", "result")],
    subset_var = "set",
    value_var = "result",
    nsim = 5e3, n = 750, seed = 42
  )

d20_sample_sim_n500 <- 
  sample_chisq_pval(
    data = dice_roll_df[dice_roll_df$die == "d20", c("set", "result")],
    subset_var = "set",
    value_var = "result",
    nsim = 5e3, n = 500, seed = 42
  )

d20_sample_sim_n1000_df <- 
  apply(d20_sample_sim_n1000, 2, quantile, probs = c(0.10, 0.50, 0.90)) %>% 
  as.data.frame() %>% 
  select(-seed) %>% 
  mutate(quantile = rownames(.)) %>% 
  pivot_longer(cols = all_of(sets), names_to = "set", values_to = "value") %>% 
  mutate(n = 1000) %>% 
  pivot_wider(names_from = "quantile", values_from = "value")

d20_sample_sim_n750_df <- 
  apply(d20_sample_sim_n750, 2, quantile, probs = c(0.10, 0.50, 0.90)) %>% 
  as.data.frame() %>% 
  select(-seed) %>% 
  mutate(quantile = rownames(.)) %>% 
  pivot_longer(cols = all_of(sets), names_to = "set", values_to = "value") %>% 
  mutate(n = 750) %>% 
  pivot_wider(names_from = "quantile", values_from = "value")

d20_sample_sim_n500_df <- 
  apply(d20_sample_sim_n500, 2, quantile, probs = c(0.10, 0.50, 0.90)) %>% 
  as.data.frame() %>% 
  select(-seed) %>% 
  mutate(quantile = rownames(.)) %>% 
  pivot_longer(cols = all_of(sets), names_to = "set", values_to = "value") %>% 
  mutate(n = 500) %>% 
  pivot_wider(names_from = "quantile", values_from = "value")

rm(d20_sample_sim_n1000, d20_sample_sim_n750, d20_sample_sim_n500)

merge_df <- function(df_list) {
    Reduce(function(x, y) merge(x, y, all = TRUE), df_list)
    }

d20_sample_sim_df <- 
  merge_df(list(d20_sample_sim_n500_df, 
                d20_sample_sim_n750_df,
                d20_sample_sim_n1000_df)) 

write.csv(x = d20_sample_sim_df,
          file = "d20_sample_sim.csv", 
          row.names = FALSE)
d20_sample_sim_df <- 
  read.csv("d20_sample_sim.csv")

set_d20_ranks <- 
  d20_sample_sim_df |> 
  group_by(set) |> 
  summarize(X50. = mean(X50.), .groups = "drop") |> 
  arrange(desc(X50.)) |> 
  pull(set)

set_d20_ranks_abbrevmap <- 
  c("lt.rainbow" = "rbw",
    "infernal" = "inf",
    "aubergine" = "aub",
    "expanse" = "xpns",
    "ice" = "ice",
    "grey" = "gry",
    "teal2" = "tl2",
    "ocean" = "ocn",
    "mushroom" = "msh",
    "teal1" = "tl1",
    "blue" = "blu",
    "jade" = "jd",
    "black" = "blk",
    "white" = "wht",
    "green" = "grn" 
    )

d20_sample_sim_df |> 
  mutate(set = factor(set, levels = set_d20_ranks)) |> 
  ggplot(aes(x = set, ymin = X10., y = X50., ymax = X90., color = set)) +
  geom_hline(yintercept = 0.05, linetype = "dashed") +
  geom_pointrange() +
  facet_wrap(facets = vars(n), ncol = 2) +
  labs(title = "p-values of Chi-square test of d20 by set",
       subtitle = "Summary of 1,000 samples of size 500 / 750 / 1000",
       y = "Chi Square p-value",
       caption = "Sampling without replacement; 10/50/90% p-value summary; ref. line at 0.05") +
  scale_color_manual(values = set_fill_map, guide = "none") +
  theme_bw() + 
  theme(panel.grid.major.x = element_blank(),
        axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1))

6.6 Bayesian Multinomial Analysis (d20)

6.6.1 Analysis and Posterior Probability Density

# brms note for Dirichlet model:
#https://discourse.mc-stan.org/t/dirichlet-regresion-using-brms/8591

set.seed(42)
d20_bayesian_pcts_df <- 
  data.frame(
    set = sets,
    die = "d20"
  ) %>% 
  left_join(
    y = dice_roll_df_ %>% 
      filter(die == "d20") %>% 
      select(set, die, results = result_tbl),
    by = c("set", "die")
    ) %>% 
  mutate(
    thetas_df = lapply(1:nrow(.), function(i) {
      set_ <- .$set[i]
      die_ <- .$die[i]
      
      sim_df <- 
        rdirichlet(n = 1e3, par = unlist(unname(.$results[i])) + 1) %>% 
        as.data.frame()
      
      colnames(sim_df) <- paste0("_", 1:20)
      sim_df %>% 
        pivot_longer(cols = everything(), names_to = "side", values_to = "value",
                     names_prefix = "_") %>% 
        mutate(set = set_, die = die_)
    })
  )

d20_bayesian_pcts_df <- 
  lapply(1:nrow(d20_bayesian_pcts_df), function(i) {
    d20_bayesian_pcts_df$thetas_df[[i]]
    }) %>% 
  bind_rows() %>% 
  mutate(side = factor(side, levels = as.character(1:20)))

d20_bayesian_pcts_df$iter <- 
  rep(
    rep(1:(nrow(d20_bayesian_pcts_df) / 
             (20 * length(unique(d20_bayesian_pcts_df$set)))), 
      each = 20),
    times = length(unique(d20_bayesian_pcts_df$set))
  )
 
set_lntyp_map <- 
  c("white" = "solid", "grey" = "solid", 
    "black" = "solid", "green" = "solid",
    "teal1" = "solid", "teal2" = "solid",
    "mushroom" = "solid", "aubergine" = "dashed",
    "ocean" = "solid", "ice" = "solid", 
    "infernal" = "dashed", "blue" = "solid",
    "jade" = "solid", "expanse" = "solid",
    "lt.rainbow" = "solid")
   
d20_bayesian_pcts_plot_top5 <- 
  d20_bayesian_pcts_df |> 
  filter(set %in% set_d20_ranks[1:5]) |> 
  ggplot(aes(x = value, color = set, linetype = set)) +
  facet_wrap(facets = vars(side), ncol = 4) +
  geom_vline(xintercept = 1 / 20, linetype = "dashed") +
  geom_density(linewidth = 0.8) +
  labs(title = "Bayesian posterior density of d20 roll probabilities by die set (top 5)",
       subtitle = "uniform Dirichlet prior, 1851 rolls per die",
       caption = "Reference line at 5%") +
  scale_x_continuous(labels = scales::percent) +
  scale_color_manual(values = set_fill_map) +
  scale_linetype_manual(values = set_lntyp_map) +
  theme_bw()

d20_bayesian_pcts_plot_nottop5 <- 
  d20_bayesian_pcts_df |> 
  filter(!(set %in% set_d20_ranks[1:5])) |> 
  ggplot(aes(x = value, color = set, linetype = set)) +
  facet_wrap(facets = vars(side), ncol = 4) +
  geom_vline(xintercept = 1 / 20, linetype = "dashed") +
  geom_density(linewidth = 0.8) +
  labs(title = "Bayesian posterior density of d20 roll probabilities by die set (not top 5)",
       subtitle = "uniform Dirichlet prior, 1851 rolls per die",
       caption = "Reference line at 5%") +
  scale_x_continuous(labels = scales::percent) +
  scale_color_manual(values = set_fill_map) +
  scale_linetype_manual(values = set_lntyp_map) +
  theme_bw()
cat("\nTop 5 d20 sets\n")

Top 5 d20 sets

print(d20_bayesian_pcts_plot_top5)

cat("\n\nRemaining d20 sets\n")

Remaining d20 sets

print(d20_bayesian_pcts_plot_nottop5)

6.6.2 Credible Intervals from Posterior Probabilities

d20_bayesian_credint_df <- 
  d20_bayesian_pcts_df |> 
  group_by(set, side) |> 
  summarize(
    credint_pct2.5 = quantile(value, 0.025),
    credint_pct50 = median(value),
    credint_pct97.5 = quantile(value, 0.975),
    .groups = "drop"
  )

d20_bayesian_credint_df <- 
  mutate(d20_bayesian_credint_df, 
         set_abbrev = vapply(1:nrow(d20_bayesian_credint_df), function(i) {
           set_d20_ranks_abbrevmap[grep(d20_bayesian_credint_df$set[i],
                                        names(set_d20_ranks_abbrevmap))]
           }, character(1L))
    ) |> 
  mutate(set_abbrev = factor(set_abbrev, levels = unname(set_d20_ranks_abbrevmap)),
         set = factor(set, levels = names(set_d20_ranks_abbrevmap)))

# plotting the credible intervals
d20_bayesian_credint_df |> 
  ggplot(aes(x = set_abbrev, 
             ymin = credint_pct2.5, y = credint_pct50, ymax = credint_pct97.5,
             color = set)) + 
  facet_wrap(facets = vars(side), ncol = 4) +
  geom_hline(yintercept = 1 / 20, linetype = "dashed") +
  geom_pointrange(linewidth = 1) +
  labs(title = "d20 95% Credible Intervals with Median by Set and Side",
       y = "Posterior Probability (%)",
       caption = "Reference line at 5%") +
  scale_y_continuous(labels = scales::percent) +
  scale_color_manual(values = set_fill_map) +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 40, hjust = 1, vjust = 1))

6.6.3 Credible Interval Zoom-in: Rolling 1 vs 20 (and ‘neighboring sides’), and ‘Low’ vs. ‘High’ Results

# deep dive: posterior probability 1 vs. 20
d20_bayesian_credint_df |> 
  filter(side %in% c("1", "20")) |>
  ggplot(aes(x = set, 
             ymin = credint_pct2.5, y = credint_pct50, ymax = credint_pct97.5,
             color = set, group = side)) +
  geom_hline(yintercept = 1 / 20, linetype = "dashed") +
  geom_pointrange(fatten = 10, shape = 22, linewidth = 1, fill = "white", 
                  position = position_dodge(width = 0.5)) +
  geom_text(aes(label = side), size = 3, color = "black",
            position = position_dodge(width = 0.5), vjust = 0.5) +
  labs(title = "d20 95% Credible Intervals with Median by Set and Side",
       subtitle = "Zoom in for P(1) and P(20)",
       y = "Posterior Probability (%)",
       caption = "Reference line at 5%") +
  scale_y_continuous(labels = scales::percent) +
  scale_color_manual(values = set_fill_map, guide = "none") +
  theme_bw()

# probability 1 or neighboring sides / 20 or neighboring sides
# deep dive: posterior probability 1 vs. 20
d20_bayesian_1_or_nghbr_df <- 
  d20_bayesian_pcts_df |> 
  filter(side %in% c(1, 7, 13, 19)) |> 
  group_by(set, iter) |> 
  summarize(value = sum(value), .groups = "drop") |> 
  mutate(grp = "1 or neighbor") |> 
  group_by(grp, set) |> 
  summarize(
    credint_pct2.5 = quantile(value, 0.025),
    credint_pct50 = median(value),
    credint_pct97.5 = quantile(value, 0.975),
    .groups = "drop"
  )

d20_bayesian_20_or_nghbr_df <- 
  d20_bayesian_pcts_df |> 
  filter(side %in% c(20, 2, 8, 14)) |> 
  group_by(set, iter) |> 
  summarize(value = sum(value), .groups = "drop") |> 
  mutate(grp = "20 or neighbor") |> 
  group_by(grp, set) |> 
  summarize(
    credint_pct2.5 = quantile(value, 0.025),
    credint_pct50 = median(value),
    credint_pct97.5 = quantile(value, 0.975),
    .groups = "drop"
  )

d20_bayesian_1_20_nghbr_df <-
  full_join(d20_bayesian_1_or_nghbr_df, d20_bayesian_20_or_nghbr_df,
            by = intersect(colnames(d20_bayesian_1_or_nghbr_df),
                           colnames(d20_bayesian_20_or_nghbr_df)))
d20_bayesian_1_20_nghbr_df |> 
  mutate(set = factor(set, levels = names(set_d20_ranks_abbrevmap))) |> 
  ggplot(aes(x = set, 
             ymin = credint_pct2.5, y = credint_pct50, ymax = credint_pct97.5,
             color = set, shape = grp)) +
  geom_hline(yintercept = 4 / 20, linetype = "dashed") +
  geom_pointrange(fatten = 5, linewidth = 1, fill = "white", 
                  position = position_dodge(width = 0.5)) +
  labs(title = "d20 95% Credible Intervals with Median by Set and Side",
       subtitle = "Zoom in for 1/20 & their 'neighboring' sides",
       y = "Posterior Probability (%)",
       caption = paste("Reference line at", paste0(4 / 20 * 100, "%"),
                       "\n1-neighboring sides: 7, 13, 19\n20-neighboring sides: 2, 8, 14"),
       shape = NULL) +
  scale_y_continuous(labels = scales::percent) +
  scale_color_manual(values = set_fill_map, guide = "none") +
  scale_shape_manual(values = c(25, 24)) +
  theme_bw()

# probability 1-3 vs 18-20
btm_top <- 3

d20_bayesian_btm_df <- 
  d20_bayesian_pcts_df |> 
  filter(side %in% c(1:btm_top)) |> 
  group_by(set, iter) |> 
  summarize(value = sum(value), .groups = "drop") |> 
  mutate(grp = paste("1 to", btm_top)) |> 
  group_by(grp, set) |> 
  summarize(
    credint_pct2.5 = quantile(value, 0.025),
    credint_pct50 = median(value),
    credint_pct97.5 = quantile(value, 0.975),
    .groups = "drop"
  )

d20_bayesian_top_df <- 
  d20_bayesian_pcts_df |> 
  filter(side %in% c((20 - btm_top + 1):20)) |> 
  group_by(set, die, iter) |> 
  summarize(value = sum(value), .groups = "drop") |> 
  mutate(grp = paste((20 - btm_top + 1), "to 20")) |> 
  group_by(grp, set) |> 
  summarize(
    credint_pct2.5 = quantile(value, 0.025),
    credint_pct50 = median(value),
    credint_pct97.5 = quantile(value, 0.975),
    .groups = "drop"
  )

d20_bayesian_lohi_df <-
  full_join(d20_bayesian_btm_df, d20_bayesian_top_df,
            by = intersect(colnames(d20_bayesian_btm_df),
                           colnames(d20_bayesian_top_df)))
d20_bayesian_lohi_df |> 
  mutate(set = factor(set, levels = names(set_d20_ranks_abbrevmap))) |> 
  ggplot(aes(x = set, 
             ymin = credint_pct2.5, y = credint_pct50, ymax = credint_pct97.5,
             color = set, shape = grp)) +
  geom_hline(yintercept = btm_top / 20, linetype = "dashed") +
  geom_pointrange(fatten = 5, linewidth = 1, fill = "white", 
                  position = position_dodge(width = 0.5)) +
  labs(title = "d20 95% Credible Intervals with Median by Set and Side",
       subtitle = paste("Zoom in for lowest and highest", btm_top, "sides"),
       y = "Posterior Probability (%)",
       caption = paste("Reference line at", paste0(btm_top / 20 * 100, "%")),
       shape = NULL) +
  scale_y_continuous(labels = scales::percent) +
  scale_color_manual(values = set_fill_map, guide = "none") +
  scale_shape_manual(values = c(25, 24)) +
  theme_bw()

6.6.4 Median Posterior Probability of (d20 side) by Set

Dynamic Shiny app

Note: this was an experiment in embedding a Shiny app in an R Markdown file; the HTML file needs to be rendered locally from the .Rmd file for this to work. IF DOING THIS, ADD ‘runtime: shiny’ AT THE LAST LINE OF THE .RMD YAML HEADER

# Shiny app radar plot for median posterior probability of sides per d20
shinyApp(

  ui = fluidPage(
    selectInput("set", "Dice set(s) to emphasize:",
                choices = unique(d20_bayesian_pcts_df$set)[order(unique(d20_bayesian_pcts_df$set))],
                selected = unique(d20_bayesian_pcts_df$set)[order(unique(d20_bayesian_pcts_df$set))][1],
                multiple = TRUE),
    checkboxInput("arrange_desc", "Arrange sides by median posterior probability"),
    plotOutput("radar_plot", width = "100%")
  ),

  server = function(input, output) {
    median_prob_df <- 
      d20_bayesian_pcts_df |> 
      group_by(die, set, side) |> 
      summarize(median_post_prob = median(value), .groups = "drop")
    
    sorted_side_order <- 
      reactive({
        if (input$arrange_desc) {
          median_prob_df |> 
            filter(set %in% input$set) |>
            group_by(side) |> 
            summarize(mean_medn_prob = mean(median_post_prob), 
                      .groups = "drop") |> 
            mutate(side = as.integer(side)) |> 
            arrange(desc(mean_medn_prob)) |> 
            pull(side)
          } else { 1:20 }
      })
    
    medn_prob_df <- 
      reactive({
        mutate(median_prob_df, side = factor(side, levels = sorted_side_order()))
      })
      
    emph_prob_df <-  
      reactive({ 
        median_prob_df |> 
        filter(set %in% input$set) |> 
        mutate(side = factor(side, levels = sorted_side_order()))
        }) 
    
    output$radar_plot = renderPlot({
        ggplot(data = emph_prob_df(), aes(x = side, y = median_post_prob, fill = set, group = set)) +
        geom_hline(yintercept = 1 / 20, linetype = "dashed") +
        geom_line(data = medn_prob_df(), color = "grey") +
        geom_point(data = medn_prob_df(), color = "grey") +
        geom_line(data = emph_prob_df(), linewidth = 0.7, color = "grey30") +
        geom_point(data = emph_prob_df(), size = 3, shape = 21, color = "grey30", stroke = 0.2) +
        labs(title = "Radar-like Plot of Median Posterior Probability of (side)",
             subtitle = "All d20 in grey, selected set(s) in color",
             x = NULL, y = NULL, color = NULL,
             caption = "Reference line at 5%") +
        scale_y_continuous(labels = scales::percent) +
        scale_fill_manual(values = set_fill_map) +
        theme_bw() +
        theme(axis.text.x = element_text(size = 10),
              axis.text.y = element_text(size = 10),
              plot.caption = element_text(size = 10),
              legend.text = element_text(size = 10)) +
        coord_polar()
    })
  },

  options = list(height = 550)
)
Shiny applications not supported in static R Markdown documents

Static Plots

mdn_post_prob_plts <- 
  lapply(seq_along(set_d20_ranks), function(i) {
    median_prob_df <- 
      d20_bayesian_pcts_df |> 
      group_by(die, set, side) |> 
      summarize(median_post_prob = median(value), .groups = "drop")
    
    sorted_side_order <- 
      median_prob_df |> 
      filter(set == set_d20_ranks[i]) |>
      group_by(side) |> 
      summarize(mean_medn_prob = mean(median_post_prob), 
                .groups = "drop") |> 
      mutate(side = as.integer(side)) |> 
      arrange(desc(mean_medn_prob)) |> 
      pull(side)
    
    medn_prob_df <- 
      mutate(median_prob_df, side = factor(side, levels = sorted_side_order))
      
    emph_prob_df <-  
      median_prob_df |> 
      filter(set == set_d20_ranks[i]) |> 
      mutate(side = factor(side, levels = sorted_side_order))
    
    ggplot(data = emph_prob_df, aes(x = side, y = median_post_prob, fill = set, group = set)) +
        geom_hline(yintercept = 1 / 20, linetype = "dashed") +
        geom_line(data = medn_prob_df, color = "grey") +
        geom_point(data = medn_prob_df, color = "grey") +
        geom_line(data = emph_prob_df, linewidth = 0.7, color = "grey30") +
        geom_point(data = emph_prob_df, size = 3, shape = 21, color = "grey30", stroke = 0.2) +
        labs(title = "Radar-like Plot of Median Posterior Probability of (side)",
             subtitle = "All d20 in grey, selected set in color",
             x = NULL, y = NULL, color = NULL,
             caption = "Reference line at 5%") +
        scale_y_continuous(labels = scales::percent) +
        scale_fill_manual(values = set_fill_map) +
        theme_bw() +
        coord_polar()
  })

for (i in seq_along(mdn_post_prob_plts)) {
  print(mdn_post_prob_plts[[i]])
}